# clean environment
rm(list=ls())

# load packages
library(ggplot2)
library(openxlsx)

# load data set with keyword counts
dat = openxlsx::read.xlsx("vsberichte_metadata.xlsx", sheet = 1)

# binary varibable indicating whether interior minister is from a center-right party
dat$cr_intmin <- ifelse(dat$intmin_party == "cdu" | dat$intmin_party== "csu" | dat$intmin_party=="fdp", 1, ifelse(dat$intmin_party=="pro", NA, 0))

# aggregate keyword counts by year and center-right interior minister
dat_agg <- aggregate(cbind(count_right_kw, count_left_kw) ~ year + cr_intmin, data=dat, FUN=sum)

# logged ratio of keywords
dat_agg$ratio_log <- log(dat_agg$count_right_kw + 0.5) - log(dat_agg$count_left_kw + 0.5)

# plot logged ratios over time
ggplot(data=dat_agg, aes(x=year, y=ratio_log, 
                         color=factor(cr_intmin), 
                         fill=factor(cr_intmin),
                         shape=factor(cr_intmin))) + 
  geom_point(size=2.5) + 
  #  stat_smooth() +
  geom_line() +
  geom_hline(yintercept=0, linetype="dashed") +
  theme_classic() +
  theme(legend.position = "bottom", text=element_text(size=16)) +
  scale_color_manual(values=c("firebrick", "dodgerblue"), name="Interior Minister Party", labels=c("Center-Left", "Center-Right")) +
  scale_fill_manual(values=c("firebrick", "dodgerblue"), name="Interior Minister Party", labels=c("Center-Left", "Center-Right")) +
  scale_shape_manual(values=c(16, 17), name="Interior Minister Party", labels=c("Center-Left", "Center-Right")) +
  ylab("Logged Ratio of Keyword Count") + xlab("") +
  scale_x_continuous(breaks=seq(1970, 2025, 10))

ggsave("FigD4_a.pdf", width=7, height=5)

